%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
# Load the PROCESSED HOUSE Dataset
house_feature_df = pd.read_csv("house_feature_df.csv")
#Import GeoPandas package to plot the latitudes and longitudines on a map
import geopandas as gpd
#Import Point and Polygon modules from the package Shapely
from shapely.geometry import Point, Polygon
#Read the shape file of KingCounty village of Washington, United States of America
KingCounty_Washington_map = gpd.read_file("USA_adm1_select2.shp")
#Plot the map of King County of Washington
fig, ax = plt.subplots(figsize = (100,100))
KingCounty_Washington_map.plot(color='grey', ax=ax, alpha = 0.4)
#Creates the point from the latitude and longitude of the located house.
#Point is essentially a single object that describes the longitude and latitude of a data-point
geometry = [Point(xy) for xy in zip(house_feature_df.long, house_feature_df.lat)]
#Convert the dataframe house_df into the Geo panda data frame house_df_new
#crs = {'init': 'epsg:4326'}
#house_feature_df_Geo = gpd.GeoDataFrame(house_feature_df, crs=crs, geometry=geometry)
house_feature_df_Geo = gpd.GeoDataFrame(house_feature_df, geometry=geometry)
#Plot the houses whose price identified as outliers (> 1128000)
fig, ax = plt.subplots(figsize = (100,100))
KingCounty_Washington_map.plot(color='grey', ax=ax, alpha = 0.9)
house_feature_df_Geo.geometry.plot(marker='.', color = 'Red', ax = ax,alpha=.5, markersize = 500)
house_feature_df_Geo[house_feature_df_Geo['price']>1128000].geometry.plot(marker="s",color = 'green', ax = ax, label = 'x',facecolors="None", alpha=.7, markersize = 1500)
plt.title('Arial View of High Cost Houses Spread',fontsize=150)
plt.legend()
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="")
location = geolocator.reverse("47.5306, -122.134")
print(location.address)
location = geolocator.reverse("47.6425, -122.406")
print(location.address)
house_feature_df['Premium_House'] = ''
rec_count = house_feature_df.shape[0]
for i in range(rec_count):
if (house_feature_df['price'][i] >= 1128000):
house_feature_df['Premium_House'][i] = 1
else:
house_feature_df['Premium_House'][i] = 0
house_feature_df.head(1)
#Plot Top 3 Rated houses (Condition = 1, 2, 3)
fig, ax = plt.subplots(figsize = (100,100))
KingCounty_Washington_map.plot(color='grey', ax=ax, alpha = 0.9)
house_feature_df_Geo.geometry.plot(marker='.', color = 'None', ax = ax,alpha=.5, markersize = 500)
house_feature_df_Geo[house_feature_df_Geo['condition'] == 3].geometry.plot(marker="s",color = 'blue', ax = ax, label = 'Category',facecolors="None", alpha=.7, markersize = 100)
house_feature_df_Geo[house_feature_df_Geo['condition'] == 4].geometry.plot(marker="s",color = 'green', ax = ax, label = 'Category',facecolors="None", alpha=.7, markersize = 50)
house_feature_df_Geo[house_feature_df_Geo['condition'] == 5].geometry.plot(marker="s",color = 'black', ax = ax, label = 'Category',facecolors="None", alpha=.7, markersize = 10)
plt.title('Arial View of Top 3 Rated Housing Categories',fontsize=150)
plt.legend()
#Plot Top 2 Quality houses spread (Quality = 7, 8)
fig, ax = plt.subplots(figsize = (100,100))
KingCounty_Washington_map.plot(color='grey', ax=ax, alpha = 0.9)
house_feature_df_Geo.geometry.plot(marker='.', color = 'Red', ax = ax,alpha=.5, markersize = 500)
house_feature_df_Geo[house_feature_df_Geo['quality'] == 7].geometry.plot(marker="s",color = 'blue', ax = ax, label = 'Quality',facecolors="blue", alpha=1, markersize = 100)
house_feature_df_Geo[house_feature_df_Geo['quality'] == 8].geometry.plot(marker="s",color = 'green', ax = ax, label = 'Quality',facecolors="green", alpha=1, markersize = 10)
plt.title('Arial View of Top 2 Quality Housing areas',fontsize=150)
plt.legend(fontsize=60)
#Plot the houses having coast facing
fig, ax = plt.subplots(figsize = (100,100))
KingCounty_Washington_map.plot(color='grey', ax=ax, alpha = 0.9)
house_feature_df_Geo.geometry.plot(marker='.', color = 'red', ax = ax,alpha=.9, markersize = 1)
house_feature_df_Geo[house_feature_df_Geo['coast']==1].geometry.plot(marker="s",color = 'blue', ax = ax, label = 'house_with Water Front ', alpha=1, markersize = 100 )
plt.title('Arial View of the KingCount Houses facing the Water Front',fontsize=150)
plt.legend(fontsize=60)
import pylab
fig, ax = plt.subplots(figsize = (100,100))
KingCounty_Washington_map.plot(color='grey', ax=ax, alpha = 0.4)
house_feature_df_Geo.geometry.plot(marker='^', color = 'red', ax = ax,label = 'Geometry', alpha=.5, markersize = 75 )
#Plot the houses which dont have the coast i.e coast =0
house_feature_df_Geo[house_feature_df_Geo['coast']==0].geometry.plot(marker="s",color = 'green', ax = ax, label = 'house_withCoast',facecolors="None", alpha=.5, markersize = 100 )
##Plot the houses which have the coast i.e coast
house_feature_df = house_feature_df.drop(['geometry'], axis=1)
house_feature_df['Premium_House'] = house_feature_df['Premium_House'].astype(np.int64)
# Write outfiles to directory
house_feature_df.to_csv('house_feature_df.csv')